import numpy as np
import pandas as pd
df = df=pd.read_excel('Downloads/divorce/divorce.xlsx')
df.head()
| Atr1 | Atr2 | Atr3 | Atr4 | Atr5 | Atr6 | Atr7 | Atr8 | Atr9 | Atr10 | ... | Atr46 | Atr47 | Atr48 | Atr49 | Atr50 | Atr51 | Atr52 | Atr53 | Atr54 | Class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2 | 1 | 3 | 3 | 3 | 2 | 3 | 2 | 1 | 1 |
| 1 | 4 | 4 | 4 | 4 | 4 | 0 | 0 | 4 | 4 | 4 | ... | 2 | 2 | 3 | 4 | 4 | 4 | 4 | 2 | 2 | 1 |
| 2 | 2 | 2 | 2 | 2 | 1 | 3 | 2 | 1 | 1 | 2 | ... | 3 | 2 | 3 | 1 | 1 | 1 | 2 | 2 | 2 | 1 |
| 3 | 3 | 2 | 3 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | ... | 2 | 2 | 3 | 3 | 3 | 3 | 2 | 2 | 2 | 1 |
| 4 | 2 | 2 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 2 | 1 | 2 | 3 | 2 | 2 | 2 | 1 | 0 | 1 |
5 rows × 55 columns
#(0=Never, 1=Seldom, 2=Averagely, 3=Frequently, 4=Always).
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 170 entries, 0 to 169 Data columns (total 55 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Atr1 170 non-null int64 1 Atr2 170 non-null int64 2 Atr3 170 non-null int64 3 Atr4 170 non-null int64 4 Atr5 170 non-null int64 5 Atr6 170 non-null int64 6 Atr7 170 non-null int64 7 Atr8 170 non-null int64 8 Atr9 170 non-null int64 9 Atr10 170 non-null int64 10 Atr11 170 non-null int64 11 Atr12 170 non-null int64 12 Atr13 170 non-null int64 13 Atr14 170 non-null int64 14 Atr15 170 non-null int64 15 Atr16 170 non-null int64 16 Atr17 170 non-null int64 17 Atr18 170 non-null int64 18 Atr19 170 non-null int64 19 Atr20 170 non-null int64 20 Atr21 170 non-null int64 21 Atr22 170 non-null int64 22 Atr23 170 non-null int64 23 Atr24 170 non-null int64 24 Atr25 170 non-null int64 25 Atr26 170 non-null int64 26 Atr27 170 non-null int64 27 Atr28 170 non-null int64 28 Atr29 170 non-null int64 29 Atr30 170 non-null int64 30 Atr31 170 non-null int64 31 Atr32 170 non-null int64 32 Atr33 170 non-null int64 33 Atr34 170 non-null int64 34 Atr35 170 non-null int64 35 Atr36 170 non-null int64 36 Atr37 170 non-null int64 37 Atr38 170 non-null int64 38 Atr39 170 non-null int64 39 Atr40 170 non-null int64 40 Atr41 170 non-null int64 41 Atr42 170 non-null int64 42 Atr43 170 non-null int64 43 Atr44 170 non-null int64 44 Atr45 170 non-null int64 45 Atr46 170 non-null int64 46 Atr47 170 non-null int64 47 Atr48 170 non-null int64 48 Atr49 170 non-null int64 49 Atr50 170 non-null int64 50 Atr51 170 non-null int64 51 Atr52 170 non-null int64 52 Atr53 170 non-null int64 53 Atr54 170 non-null int64 54 Class 170 non-null int64 dtypes: int64(55) memory usage: 73.2 KB
import seaborn as sns
sns.pairplot(df, hue='Class')
<seaborn.axisgrid.PairGrid at 0x7f82180933a0>
df.Class.value_counts().plot(kind='bar')
<AxesSubplot:>
y=df.loc[:,"Class"]
x=df.drop(["Class"],axis=1)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33,random_state=0)
from sklearn.preprocessing import StandardScaler #feature scalling (normalization) always scale trainning before test
sc=StandardScaler()
trainning_scaled=sc.fit_transform(X_train)
test_scaled=sc.transform(X_test)
from sklearn.svm import SVC
svmmodel=SVC(kernel='poly', random_state=0)
svmmodel.fit(X_train,y_train)
SVC(kernel='poly', random_state=0)
y_predict_train=svmmodel.predict(X_train)
y_predict_test=svmmodel.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 0.9823008849557522 test acc= 0.9649122807017544
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29 0] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.962962962962963
from sklearn.tree import DecisionTreeClassifier
DTC = DecisionTreeClassifier()
DTC.fit(X_train, y_train)
DecisionTreeClassifier()
y_predict_train=DTC.predict(X_train)
y_predict_test=DTC.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 1.0 test acc= 0.9473684210526315
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[28 1] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.9454545454545454
from sklearn.ensemble import RandomForestClassifier
RF=RandomForestClassifier()
RF.fit(X_train,y_train)
RandomForestClassifier()
y_predict_train=RF.predict(X_train)
y_predict_test=RF.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 1.0 test acc= 0.9649122807017544
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29 0] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.962962962962963
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier(n_neighbors=3)
KNN.fit(X_train,y_train)
KNeighborsClassifier(n_neighbors=3)
y_predict_train=KNN.predict(X_train)
y_predict_test=KNN.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 0.9823008849557522 test acc= 0.9649122807017544
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29 0] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.962962962962963
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
NB.fit(X_train,y_train)
GaussianNB()
y_predict_train=NB.predict(X_train)
y_predict_test=NB.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 0.9911504424778761 test acc= 0.9473684210526315
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[28 1] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.9454545454545454
from sklearn.linear_model import LogisticRegression
LG = LogisticRegression(random_state=0)
LG.fit(X_train,y_train)
LogisticRegression(random_state=0)
y_predict_train=LG.predict(X_train)
y_predict_test=LG.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc= 1.0 test acc= 0.9649122807017544
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29 0] [ 2 26]]
from sklearn.metrics import f1_score
f1_score(y_test,y_predict_test)
0.962962962962963